# run this code if the plotly express is not installed
#conda install -c plotly plotly_express
# run this code if the missingno library is not installed
#conda install -c conda-forge missingno
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import missingno as msno
import plotly.express as px
import seaborn as sns
unsco_sclclsr_main = pd.read_excel('UNESCO_school_closures_database.xlsx',
sheet_name = 'database')
#datalink:http://covid19.uis.unesco.org/global-monitoring-school-closures-covid19/
unsco_sclclsr_main.head()
| Date | Country ID | Country | Region Type 1 | Region 1 | Region Type 2 | Region 2 | Region Type 3 | Region 3 | Status | Partially Open Type (As of October 31, 2021) | Enrolment (Pre-Primary to Tertiary) | Teachers (Pre-Primary to Upper Secondary) | School Age Population (Pre-Primary to Upper Secondary) | Distance learning modalities (TV) | Distance learning modalities (Radio) | Distance learning modalities (Online) | Distance learning modalities (Global) | Weeks partially open | Weeks fully closed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2020-02-16 | ABW | Aruba | EC | ECLAC: Latin America and the Caribbean | SDG | SDG: Latin America and the Caribbean | WB Income | WB: High income | Fully open | NaN | 22195.0 | 1211.076660 | 16731 | No | No | Yes | Online | 7.714286 | 4.857143 |
| 1 | 2020-02-16 | AFG | Afghanistan | EC | ESCAP: Asia and the Pacific | SDG | SDG: Asia (Central and Southern) | WB Income | WB: Low income | Fully open | Gender | 10483711.0 | 229635.000000 | 13266916 | Yes | Yes | Yes | TV + Online + Radio | 29.857143 | 23.000000 |
| 2 | 2020-02-16 | AGO | Angola | EC | ECA: Africa | SDG | SDG: Africa (Sub-Saharan) | WB Income | WB: Lower middle income | Fully open | NaN | 8692733.0 | 184264.000000 | 12768266 | Yes | No | No | TV | 27.857143 | 18.285714 |
| 3 | 2020-02-16 | AIA | Anguilla | EC | ECLAC: Latin America and the Caribbean | SDG | SDG: Latin America and the Caribbean | NaN | NaN | Fully open | Grades | 3125.0 | 220.287598 | 2826 | No | No | No | None | 7.000000 | 12.714286 |
| 4 | 2020-02-16 | ALB | Albania | EC | ECE: Europe | SDG | SDG: Northern America and Europe | WB Income | WB: Upper middle income | Fully open | NaN | 612534.0 | 38535.000000 | 522479 | Yes | Yes | Yes | TV + Online + Radio | 11.285714 | 17.428571 |
Global Monitoring of School Closures Data & UNESCO school closures data
#print(school_closure_main.isna().sum())
print('UNESCO records',unsco_sclclsr_main.shape)
print(unsco_sclclsr_main.isna().sum())
UNESCO records (131040, 20) Date 0 Country ID 0 Country 0 Region Type 1 3744 Region 1 3744 Region Type 2 0 Region 2 0 Region Type 3 3744 Region 3 3744 Status 0 Partially Open Type (As of October 31, 2021) 99216 Enrolment (Pre-Primary to Tertiary) 0 Teachers (Pre-Primary to Upper Secondary) 0 School Age Population (Pre-Primary to Upper Secondary) 0 Distance learning modalities (TV) 0 Distance learning modalities (Radio) 0 Distance learning modalities (Online) 0 Distance learning modalities (Global) 0 Weeks partially open 0 Weeks fully closed 0 dtype: int64
#school_closure_main['Date'] = pd.to_datetime(school_closure_main['Date'])
unsco_sclclsr_main['Date'] = pd.to_datetime(unsco_sclclsr_main['Date'])
#df_schl_main = school_closure_main.groupby(['Country','Status']).agg({'ISO':'count'}).reset_index()
#df_schl_main = df_schl_main.rename(columns = {'ISO':'Status_Count'})
#df_schl_main = df_schl_main.sort_values(by=['Status_Count'],ascending=False)
#df_schl_main_closed = df_schl_main[(df_schl_main['Status'] == 'Closed due to COVID-19')].head(10)
#df_schl_main_paropen = df_schl_main[(df_schl_main['Status'] == 'Partially open')].head(10)
#df_schl_main_closed_shortest10 = df_schl_main[(df_schl_main['Status'] == 'Closed due to COVID-19')].tail(10)
#df_schl_main_paropen_shortest10 = df_schl_main[(df_schl_main['Status'] == 'Partially open')].tail(10)
#df_schl_main_closed.head()
# calculation for unesco data
unsco_sclclsr_data = unsco_sclclsr_main.copy()
unsco_sclclsr_data = unsco_sclclsr_data.groupby(['Country','Status']).agg({'Country ID':'count'}).reset_index()
unsco_sclclsr_data = unsco_sclclsr_data.rename(columns = {'Country ID':'Status_Count'})
unsco_sclclsr_data = unsco_sclclsr_data.sort_values(by=['Status_Count'],ascending=False)
unsco_sclclsr_data_closed10 = unsco_sclclsr_data[(unsco_sclclsr_data['Status'] == 'Closed due to COVID-19')].head(10)
unsco_sclclsr_data_paropen10 = unsco_sclclsr_data[(unsco_sclclsr_data['Status'] == 'Partially open')].head(10)
unsco_sclclsr_data_shortest10 = unsco_sclclsr_data[(unsco_sclclsr_data['Status'] == 'Closed due to COVID-19')].tail(10)
unsco_sclclsr_data_paropen_shortest10 = unsco_sclclsr_data[(unsco_sclclsr_data['Status'] == 'Partially open')].tail(10)
print('unsco_sclclsr_main.shape:',unsco_sclclsr_main.shape)
print('unsco_sclclsr_data:',unsco_sclclsr_data.shape)
print('unsco_sclclsr_data_closed10:',unsco_sclclsr_data_closed10.shape)
print('unsco_sclclsr_data_paropen10:',unsco_sclclsr_data_paropen10.shape)
print('unsco_sclclsr_data_shortest10:',unsco_sclclsr_data_shortest10.shape)
print('unsco_sclclsr_data_paropen_shortest10:',unsco_sclclsr_data_paropen_shortest10.shape)
unsco_sclclsr_data_closed10.head()
unsco_sclclsr_main.shape: (131040, 20) unsco_sclclsr_data: (808, 3) unsco_sclclsr_data_closed10: (10, 3) unsco_sclclsr_data_paropen10: (10, 3) unsco_sclclsr_data_shortest10: (10, 3) unsco_sclclsr_data_paropen_shortest10: (10, 3)
| Country | Status | Status_Count | |
|---|---|---|---|
| 60 | Bangladesh | Closed due to COVID-19 | 439 |
| 388 | Kuwait | Closed due to COVID-19 | 431 |
| 789 | Venezuela | Closed due to COVID-19 | 424 |
| 499 | Myanmar | Closed due to COVID-19 | 414 |
| 326 | Honduras | Closed due to COVID-19 | 409 |
# check if any country has only status as fully open
unsco_sclclsr_data_fo = unsco_sclclsr_data.copy()
unsco_sclclsr_data_fo = unsco_sclclsr_data_fo.sort_values(['Country','Status'])
unsco_sclclsr_data_fo = unsco_sclclsr_data_fo.groupby(['Country']).agg({'Status':'count'}).reset_index()
unsco_sclclsr_data_fo = unsco_sclclsr_data_fo[unsco_sclclsr_data_fo['Status'] <= 2]
display(unsco_sclclsr_data_fo)
unsco_sclclsr_data_fo_result = pd.merge(unsco_sclclsr_data_fo,unsco_sclclsr_data,on='Country',how='inner')
#unsco_sclclsr_data_fo_result.head(10)
#print(unsco_sclclsr_data_fo_result.shape)
unsco_sclclsr_data_fo_result = unsco_sclclsr_data_fo_result[['Country','Status_y','Status_Count']]
unsco_sclclsr_data_fo_result = unsco_sclclsr_data_fo_result.rename(columns={'Status_y':"Status"})
display(unsco_sclclsr_data_fo_result)
fig = px.bar(unsco_sclclsr_data_fo_result, y ='Status_Count', x = 'Country',
text = 'Status_Count',
title ="Countries did not have any closure due to covid between Feb/16/2020 and Oct/31/2021",
labels = {'Status_Count':'No of Days'},
color = 'Status'
)
fig.show()
| Country | Status | |
|---|---|---|
| 17 | Belarus | 2 |
| 31 | Burundi | 2 |
| 130 | Nauru | 2 |
| 184 | Tajikistan | 2 |
| Country | Status | Status_Count | |
|---|---|---|---|
| 0 | Belarus | Fully open | 425 |
| 1 | Belarus | Academic break | 199 |
| 2 | Burundi | Fully open | 457 |
| 3 | Burundi | Academic break | 167 |
| 4 | Nauru | Fully open | 571 |
| 5 | Nauru | Academic break | 53 |
| 6 | Tajikistan | Fully open | 410 |
| 7 | Tajikistan | Academic break | 214 |
fig = px.bar(unsco_sclclsr_data_closed10, y ='Status_Count', x = 'Country',
text = 'Status_Count',
title ="Top 10 Longest Fully closed schools from Feb/16/2020 to Oct/31/2021",
labels = {'Status_Count':'No of Days'}
)
fig.show()
fig = px.bar(unsco_sclclsr_data_paropen10, y ='Status_Count', x = 'Country',
text = 'Status_Count',
title ="Top 10 Longest Partially closed Schools Status from Feb/16/2020 to Oct/31/2021",
labels = {'Status_Count':'No of Days'}
)
fig.show()
fig = px.bar(unsco_sclclsr_data_shortest10, y ='Status_Count', x = 'Country',
text = 'Status_Count',
title ="Country wise 10 Shortest School Closures from Feb/16/2020 to Oct/31/2021",
labels = {'Status_Count':'No of Days'}
)
fig.update_xaxes(tickangle=45)
fig.show()
fig = px.bar(unsco_sclclsr_data_paropen_shortest10, y ='Status_Count', x = 'Country',
text = 'Status_Count',
title ="Country wise 10 Shortest Partially School Closures Status from Feb/16/2020 to Oct/31/2021",
labels = {'Status_Count':'No of Days'}
)
fig.update_xaxes(tickangle=45)
fig.show()
top10_closed = unsco_sclclsr_data_closed10.Country.tolist()
top10_paropen = unsco_sclclsr_data_paropen10.Country.tolist()
# fully closed schools
unsco_fullyclosed_top10 = unsco_sclclsr_main[ (unsco_sclclsr_main.Country.isin(top10_closed)) &
(unsco_sclclsr_main['Status'] == 'Closed due to COVID-19')]
print(unsco_fullyclosed_top10.shape)
# partially open schools
unsco_parclosed_top10 = unsco_sclclsr_main[ (unsco_sclclsr_main.Country.isin(top10_paropen)) &
(unsco_sclclsr_main['Status'] == 'Partially open')]
print(unsco_parclosed_top10.shape)
(4028, 20) (3792, 20)
# fully closed top 10
fig = px.scatter(unsco_fullyclosed_top10, x ='Date', y = 'Country',
color='Country',
title ="Top 10 Country with longest fully closed schools due to COVID-19 from Feb-2020 to Oct-2021",
labels = {'Date':'Time Period'},
category_orders= { "Country": top10_closed }
)
fig.show()
# Partially open top 10
fig = px.scatter(unsco_parclosed_top10, x ='Date', y = 'Country',
color='Country',
title ="Top 10 Country with longest partially opened schools due to COVID-19 from Feb-2020 to Oct-2021",
labels = {'Date':'Time Period'},
category_orders= { "Country": top10_paropen }
)
fig.show()
shortest10_closed = unsco_sclclsr_data_shortest10.Country.tolist()
shortest10_paropen = unsco_sclclsr_data_paropen_shortest10.Country.tolist()
# fully closed schools
school_closed_shortest10 = unsco_sclclsr_main[ (unsco_sclclsr_main.Country.isin(shortest10_closed)) &
(unsco_sclclsr_main['Status'] == 'Closed due to COVID-19')]
print(school_closed_shortest10.shape)
# partially open schools
school_parclosed_shortest10 = unsco_sclclsr_main[ (unsco_sclclsr_main.Country.isin(shortest10_paropen)) &
(unsco_sclclsr_main['Status'] == 'Partially open')]
print(school_parclosed_shortest10.shape)
(194, 20) (78, 20)
# fully closed shortes 10
fig = px.scatter(school_closed_shortest10, x ='Date', y = 'Country',
color='Country',
title ="Top 10 Country with shortest fully closed schools due to COVID-19 From Feb-2020 to Oct-2021",
category_orders= { "Country": shortest10_closed },
labels = {'Date':'Time Period'}
)
fig.show()
# Partially open top 10
fig = px.scatter(school_parclosed_shortest10, x ='Date', y = 'Country',
color='Country',
title ="Top 10 Country with shortest partially opened schools due to COVID-19 from Feb-2020 to Oct-2021",
category_orders= { "Country": shortest10_paropen },
labels = {'Date':'Time Period'}
)
fig.show()
unsco_sclclsr = unsco_sclclsr_main[['Date','Country','Status']].copy()
unsco_sclclsr = unsco_sclclsr.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr = unsco_sclclsr.rename(columns={0:'Country'})
print(unsco_sclclsr.head())
fig = px.line(unsco_sclclsr, x ='Date', y = 'Country',
color='Status',
title ="Global School Closures status due to COVID-19 from Feb-2020 to Oct-2021",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.update_xaxes(tickangle=45)
fig.show()
# Data link: http://covid19.uis.unesco.org/global-monitoring-school-closures-covid19/
Status Date Country 0 Academic break 2020-03-02 1 1 Academic break 2020-03-03 1 2 Academic break 2020-03-04 1 3 Academic break 2020-03-05 1 4 Academic break 2020-03-06 1
#unsco_sclclsr = unsco_sclclsr[(unsco_sclclsr['Date'] <= '2021-03-01')]
fig = px.area(unsco_sclclsr, x="Date", y="Country",
color="Status",line_group="Country",
title ="Global School Closures status due to COVID-19 from Feb-2020 to Oct-2021",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.show()
import plotly.graph_objects as go
#import plotly.io as pio
from plotly.subplots import make_subplots
# subplot setup
fig = make_subplots(rows=4, cols=1, shared_xaxes=True)
#unsco_sclclsr = unsco_sclclsr[(unsco_sclclsr['Date'] <= '2021-03-03')]
unsco_sclclsr_fullyclosed = unsco_sclclsr[unsco_sclclsr['Status'] == 'Closed due to COVID-19'].copy()
fig.append_trace( go.Scatter(x = unsco_sclclsr_fullyclosed["Date"].tolist(),
y = unsco_sclclsr_fullyclosed["Country"].tolist(),
name = "Closed due to COVID-19",
fill='tozeroy',line_color='#c0241c'
), row =1, col=1
)
unsco_sclclsr_paropen = unsco_sclclsr[unsco_sclclsr['Status'] == 'Partially open']
fig.append_trace(go.Scatter(x = unsco_sclclsr_paropen["Date"].tolist(),
y = unsco_sclclsr_paropen["Country"].tolist(),
name = "Partially open",fill='tozeroy',line_color='#ffcc24'
), row = 2, col=1
)
unsco_sclclsr_acdbrk = unsco_sclclsr[unsco_sclclsr['Status'] == 'Academic break']
fig.append_trace (go.Scatter(x = unsco_sclclsr_acdbrk["Date"].tolist(),
y = unsco_sclclsr_acdbrk["Country"].tolist(),
name = "Academic break",fill='tozeroy',line_color='#c0bcbc'
), row =3, col =1
)
unsco_sclclsr_fullopen = unsco_sclclsr[unsco_sclclsr['Status'] == 'Fully open']
fig.append_trace (go.Scatter(x = unsco_sclclsr_fullopen["Date"].tolist(),
y = unsco_sclclsr_fullopen["Country"].tolist(),
name = "Fully open",fill='tozeroy',line_color='#204c7c'
), row =4, col =1
)
fig.update_xaxes(tickangle=45,title_text="Time Period",row =4, col =1)
fig.update_yaxes(title_text="No of Countries")
fig.update_layout(height=800, width=800, title_text="Global Covid School Closures Analysis for Different Statuses",
showlegend= True)
fig.show()
unsco_sclclsr_main.isna().sum()
Date 0 Country ID 0 Country 0 Region Type 1 3744 Region 1 3744 Region Type 2 0 Region 2 0 Region Type 3 3744 Region 3 3744 Status 0 Partially Open Type (As of October 31, 2021) 99216 Enrolment (Pre-Primary to Tertiary) 0 Teachers (Pre-Primary to Upper Secondary) 0 School Age Population (Pre-Primary to Upper Secondary) 0 Distance learning modalities (TV) 0 Distance learning modalities (Radio) 0 Distance learning modalities (Online) 0 Distance learning modalities (Global) 0 Weeks partially open 0 Weeks fully closed 0 dtype: int64
unsco_sclclsr_main["Region 2"].unique()
array(['SDG: Latin America and the Caribbean',
'SDG: Asia (Central and Southern)', 'SDG: Africa (Sub-Saharan)',
'SDG: Northern America and Europe',
'SDG: Western Asia and Northern Africa', 'SDG: Oceania',
'SDG: Asia (Eastern and South-eastern)'], dtype=object)
unsco_sclclsr = unsco_sclclsr_main[['Date','Country','Status']].copy()
unsco_sclclsr = unsco_sclclsr.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr = unsco_sclclsr.rename(columns={0:'Country'})
unsco_sclclsr_regional = unsco_sclclsr_main[['Date','Country','Region 2','Status']].copy()
#unsco_sclclsr = unsco_sclclsr[(unsco_sclclsr['Date'] <= '2021-03-01')]
unsco_sclclsr_LA = unsco_sclclsr_regional[unsco_sclclsr_regional['Region 2'] == 'SDG: Latin America and the Caribbean']
unsco_sclclsr_LA = unsco_sclclsr_LA[['Date','Country','Status']]
#unsco_sclclsr_LA = unsco_sclclsr_LA.groupby(['Status','Date']).nunique().reset_index()
unsco_sclclsr_LA = unsco_sclclsr_LA.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr_LA = unsco_sclclsr_LA.rename(columns={0:'Country'})
unsco_sclclsr_LA.head()
| Status | Date | Country | |
|---|---|---|---|
| 0 | Academic break | 2020-03-16 | 1 |
| 1 | Academic break | 2020-03-17 | 1 |
| 2 | Academic break | 2020-03-18 | 1 |
| 3 | Academic break | 2020-03-19 | 1 |
| 4 | Academic break | 2020-03-20 | 1 |
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
fig = px.area(unsco_sclclsr_LA, x="Date", y="Country",
color="Status",line_group="Country",
title ="Latin America & Caribbean School Closures status due to COVID-19",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500, width=900)
fig.show()
unsco_sclclsr_AsiaCS = unsco_sclclsr_regional[unsco_sclclsr_regional['Region 2'].isin(
['SDG: Asia (Central and Southern)'])]
unsco_sclclsr_AsiaCS = unsco_sclclsr_AsiaCS[['Date','Country','Status']]
#unsco_sclclsr_AsiaCS = unsco_sclclsr_AsiaCS.groupby(['Status','Date']).nunique().reset_index()
unsco_sclclsr_AsiaCS = unsco_sclclsr_AsiaCS.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr_AsiaCS = unsco_sclclsr_AsiaCS.rename(columns={0:'Country'})
#unsco_sclclsr_AsiaAfrica.head()
fig = px.area(unsco_sclclsr_AsiaCS, x="Date", y="Country",
color="Status",line_group="Country",
title ="Asia Central and Southern School Closures status due to COVID-19",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500, width=900)
fig.show()
unsco_sclclsr_AfricaSS = unsco_sclclsr_regional[unsco_sclclsr_regional['Region 2'].isin(
['SDG: Africa (Sub-Saharan)'])]
unsco_sclclsr_AfricaSS = unsco_sclclsr_AfricaSS[['Date','Country','Status']]
#unsco_sclclsr_AfricaSS = unsco_sclclsr_AfricaSS.groupby(['Status','Date']).nunique().reset_index()
unsco_sclclsr_AfricaSS = unsco_sclclsr_AfricaSS.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr_AfricaSS = unsco_sclclsr_AfricaSS.rename(columns={0:'Country'})
#unsco_sclclsr_AsiaAfrica.head()
fig = px.area(unsco_sclclsr_AfricaSS, x="Date", y="Country",
color="Status",line_group="Country",
title ="Africa Sub-Saharan School Closures status due to COVID-19",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500, width=900)
fig.show()
unsco_sclclsr_NAEUR = unsco_sclclsr_regional[unsco_sclclsr_regional['Region 2'].isin(
['SDG: Northern America and Europe'])]
unsco_sclclsr_NAEUR = unsco_sclclsr_NAEUR[['Date','Country','Status']]
#unsco_sclclsr_NAEUR = unsco_sclclsr_NAEUR.groupby(['Status','Date']).nunique().reset_index()
unsco_sclclsr_NAEUR = unsco_sclclsr_NAEUR.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr_NAEUR = unsco_sclclsr_NAEUR.rename(columns={0:'Country'})
#unsco_sclclsr_AsiaAfrica.head()
fig = px.area(unsco_sclclsr_NAEUR, x="Date", y="Country",
color="Status",line_group="Country",
title ="Northen America and Europe School Closures status due to COVID-19",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500, width=900)
unsco_sclclsr_WANAF = unsco_sclclsr_regional[unsco_sclclsr_regional['Region 2'].isin(
['SDG: Western Asia and Northern Africa'])]
unsco_sclclsr_WANAF = unsco_sclclsr_WANAF[['Date','Country','Status']]
#unsco_sclclsr_WANAF = unsco_sclclsr_WANAF.groupby(['Status','Date']).nunique().reset_index()
unsco_sclclsr_WANAF = unsco_sclclsr_WANAF.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr_WANAF = unsco_sclclsr_WANAF.rename(columns={0:'Country'})
#unsco_sclclsr_AsiaAfrica.head()
fig = px.area(unsco_sclclsr_WANAF, x="Date", y="Country",
color="Status",line_group="Country",
title ="Western Asis and Northen Africa School Closures status due to COVID-19",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500, width=900)
unsco_sclclsr_Oceania = unsco_sclclsr_regional[unsco_sclclsr_regional['Region 2'].isin(
['SDG: Oceania'])]
unsco_sclclsr_Oceania = unsco_sclclsr_Oceania[['Date','Country','Status']]
#unsco_sclclsr_Oceania = unsco_sclclsr_Oceania.groupby(['Status','Date']).nunique().reset_index()
unsco_sclclsr_Oceania = unsco_sclclsr_Oceania.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr_Oceania = unsco_sclclsr_Oceania.rename(columns={0:'Country'})
#unsco_sclclsr_AsiaAfrica.head()
fig = px.area(unsco_sclclsr_Oceania, x="Date", y="Country",
color="Status",line_group="Country",
title ="Oceania School Closures status due to COVID-19",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500, width=900)
unsco_sclclsr_AsiaES = unsco_sclclsr_regional[unsco_sclclsr_regional['Region 2'].isin(
['SDG: Asia (Eastern and South-eastern)'])]
unsco_sclclsr_AsiaES = unsco_sclclsr_AsiaES[['Date','Country','Status']]
#unsco_sclclsr_AsiaES = unsco_sclclsr_AsiaES.groupby(['Status','Date']).nunique().reset_index()
unsco_sclclsr_AsiaES = unsco_sclclsr_AsiaES.groupby(['Status','Date']).size().reset_index()
unsco_sclclsr_AsiaES = unsco_sclclsr_AsiaES.rename(columns={0:'Country'})
#unsco_sclclsr_AsiaAfrica.head()
fig = px.area(unsco_sclclsr_AsiaES, x="Date", y="Country",
color="Status",line_group="Country",
title ="Asis Eastern and South-Eastern School Closures status due to COVID-19",
labels = {'Country':'No of Countries',
'Date':'Time Period'},
color_discrete_map={"Closed due to COVID-19": "#c0241c",
"Partially open": "#ffcc24",
"Academic break": "#c0bcbc",
"Fully open": "#204c7c"},
category_orders= { "Status": ["Closed due to COVID-19","Partially open","Fully open",
"Academic break"]}
)
fig.for_each_trace(lambda trace: trace.update(fillcolor = trace.line.color))
fig.update_xaxes(tickangle=45)
fig.update_layout(height=500, width=900)
school_closure_main_smry = unsco_sclclsr_main.copy()
# derive month year
school_closure_main_smry['month_year'] = school_closure_main_smry['Date'].dt.strftime('%b,%Y')
school_closure_main_smry = school_closure_main_smry.groupby(['Country','Status','month_year']).agg({'Country ID':'count'}) .reset_index()
school_closure_main_smry = school_closure_main_smry.rename(columns = {'Country ID':'Status_Count'})
#school_closure_main_smry = school_closure_main_smry.sort_values(['Status_Count'],ascending=False)
#school_closure_main_smry = school_closure_main_smry[school_closure_main_smry['Status'] == 'Closed due to COVID-19']
# sort by months
months = ["Jan,2020", "Feb,2020", "Mar,2020", "Apr,2020", "May,2020", "Jun,2020", "Jul,2020",
"Aug,2020", "Sep,2020", "Oct,2020", "Nov,2020", "Dec,2020",
"Jan,2021", "Feb,2021", "Mar,2021", "Apr,2021", "May,2021", "Jun,2021", "Jul,2021",
"Aug,2021", "Sep,2021", "Oct,2021", "Nov,2021", "Dec,2021"]
school_closure_main_smry['month_year'] = pd.Categorical(school_closure_main_smry['month_year'], categories=months, ordered=True)
school_closure_main_smry = school_closure_main_smry.sort_values(by=['month_year','Status_Count'],
ascending=[True,False])
school_closure_main_smry_fullyclosed = school_closure_main_smry[school_closure_main_smry['Status'] ==
'Closed due to COVID-19']
# filter for top 10 countries
#school_closure_main_smry = school_closure_main_smry[school_closure_main_smry.Country.isin(top10_closed)]
# filter for top 5 fully closed countries
top5_closed = top10_closed[:5]
school_closure_main_smry_fullyclosed = school_closure_main_smry_fullyclosed[
school_closure_main_smry_fullyclosed.Country.isin(top5_closed)]
print('fully closed records count:',school_closure_main_smry_fullyclosed.shape[0])
#school_closure_main_smry_fullyclosed.head()
# Partially opened school
school_closure_main_smry_paropen = school_closure_main_smry[school_closure_main_smry['Status'] == 'Partially open']
# filter for top 5
top5_paropen = top10_paropen[:5]
school_closure_main_smry_paropen = school_closure_main_smry_paropen[
school_closure_main_smry_paropen.Country.isin(top5_paropen)]
print('partially opened records count:',school_closure_main_smry_paropen.shape[0])
school_closure_main_smry_paropen.head()
fully closed records count: 81 partially opened records count: 75
| Country | Status | month_year | Status_Count | |
|---|---|---|---|---|
| 6002 | United States of America | Partially open | Feb,2020 | 2 |
| 6007 | United States of America | Partially open | Mar,2020 | 23 |
| 2611 | India | Partially open | Mar,2020 | 21 |
| 5998 | United States of America | Partially open | Apr,2020 | 30 |
| 6009 | United States of America | Partially open | May,2020 | 31 |
school_closure_main_smry_fullyclosed.month_year.unique()
['Mar,2020', 'Apr,2020', 'May,2020', 'Jun,2020', 'Jul,2020', ..., 'Jun,2021', 'Jul,2021', 'Aug,2021', 'Sep,2021', 'Oct,2021'] Length: 20 Categories (24, object): ['Jan,2020' < 'Feb,2020' < 'Mar,2020' < 'Apr,2020' ... 'Sep,2021' < 'Oct,2021' < 'Nov,2021' < 'Dec,2021']
fig = px.histogram(school_closure_main_smry_fullyclosed, x ='month_year', y = 'Status_Count',
color='Country',
title ="Number of days fully closed schools status for top 5 country due to COVID-19 ",
labels = {'Status_Count':'No of Days School Closed',
'month_year':'Time Period'},
nbins = 20
)
fig.show()
fig = px.histogram(school_closure_main_smry_paropen, x ='month_year', y = 'Status_Count',
color='Country',
title ="Number of days partially open schools status for top 5 country due to COVID-19",
labels = {'Status_Count':'No of Days School Opened',
'month_year':'Time Period'},
nbins = 20
)
fig.update_xaxes(tickangle=45)
fig.show()
#school_enrollment = pd.read_csv('Enrollment_by_level_of_education.csv')
#school_enrollment.head()
print(top10_closed)
print(top10_paropen)
['Bangladesh', 'Kuwait', 'Venezuela', 'Myanmar', 'Honduras', 'Uganda', 'Philippines', 'Panama', 'Mexico', 'Iraq'] ['United States of America', 'Republic of Korea', 'Chile', 'Argentina', 'India', 'Indonesia', 'Ghana', 'Colombia', 'Bhutan', 'Grenada']
# 2019 enrollment verification with closures countries
enrl_country_list = school_enrollment[(school_enrollment['TIME'] == 2019)]
school_enrollment_top10_2019 = enrl_country_list[(enrl_country_list.Country.isin(top10_closed))]
print('Matching countries for 2019:',school_enrollment_top10_2019.Country.unique())# 4 missing
# 2020 enrollment verification with closures countries
enrl_country_list = school_enrollment[(school_enrollment['TIME'] == 2020)]
school_enrollment_top10_2020 = enrl_country_list[(enrl_country_list.Country.isin(top10_closed))]
print('Matching countries for 2020:',school_enrollment_top10_2020.Country.unique())# 4 missing
# 2021 enrollment verification with closures countries
enrl_country_list = school_enrollment[(school_enrollment['TIME'] == 2021)]
school_enrollment_top10_2021 = enrl_country_list[(enrl_country_list.Country.isin(top10_closed))]
print('Matching countries for 2021:',school_enrollment_top10_2021.Country.unique())# no match
# check for top 10 school closed country if exists
school_enrollment_top10 = enrl_country_list[(enrl_country_list.Country.isin(top10_closed))]
school_enrollment_top10.Country.unique()
unsco_sclclsr_main.dtypes
global_gdp_main = pd.read_excel("API_NY.GDP.MKTP.CD_DS2_en_excel_v2_3469550.xls",skiprows=3)
global_gdp_main.columns
# data link: https://data.worldbank.org/indicator/NY.GDP.MKTP.CD
Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
'1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
'1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
'1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
'1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
'1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
'2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018', '2019', '2020'],
dtype='object')
# clean the data for visualization
global_gdp = global_gdp_main[["Country Name","Country Code", "2020"]].copy()
# drop NA here
global_gdp.dropna(inplace=True)
global_gdp.head()
| Country Name | Country Code | 2020 | |
|---|---|---|---|
| 1 | Africa Eastern and Southern | AFE | 9.188155e+11 |
| 2 | Afghanistan | AFG | 2.011614e+10 |
| 3 | Africa Western and Central | AFW | 7.845876e+11 |
| 4 | Angola | AGO | 5.837598e+10 |
| 5 | Albania | ALB | 1.488763e+10 |
unsco_sclclsr_main.columns
Index(['Date', 'Country ID', 'Country', 'Region Type 1', 'Region 1',
'Region Type 2', 'Region 2', 'Region Type 3', 'Region 3', 'Status',
'Partially Open Type (As of October 31, 2021)',
'Enrolment (Pre-Primary to Tertiary)',
'Teachers (Pre-Primary to Upper Secondary)',
'School Age Population (Pre-Primary to Upper Secondary)',
'Distance learning modalities (TV)',
'Distance learning modalities (Radio)',
'Distance learning modalities (Online)',
'Distance learning modalities (Global)', 'Weeks partially open',
'Weeks fully closed'],
dtype='object')
sclclsr_forgdp = unsco_sclclsr_main[['Date','Country ID','Country','Status']].copy()
# keep records for 2020 only as per GDP data
sclclsr_forgdp = sclclsr_forgdp[(sclclsr_forgdp['Date'] < '2021-01-01')]
# group by country and status to get school closed counts:
sclclsr_forgdp = sclclsr_forgdp.groupby(['Country ID', 'Status']).size().reset_index(name='counts')
# filter for fully closed
sclclsr_forgdp = sclclsr_forgdp[(sclclsr_forgdp['Status'] == 'Closed due to COVID-19')]
sclclsr_forgdp.head()
| Country ID | Status | counts | |
|---|---|---|---|
| 1 | ABW | Closed due to COVID-19 | 54 |
| 5 | AFG | Closed due to COVID-19 | 161 |
| 8 | AGO | Closed due to COVID-19 | 195 |
| 12 | AIA | Closed due to COVID-19 | 28 |
| 16 | ALB | Closed due to COVID-19 | 59 |
# combine GDP and School Closure datasets
print('sclclsr_forgdp records count:',sclclsr_forgdp.shape[0])
print('global_gdp records count:',global_gdp.shape[0])
sclclsr_gdp_combined = pd.merge(sclclsr_forgdp,global_gdp,how='inner',left_on='Country ID',right_on='Country Code')
print('combined records count:',sclclsr_gdp_combined.shape[0])
sclclsr_gdp_combined.head()
sclclsr_forgdp records count: 197 global_gdp records count: 241 combined records count: 176
| Country ID | Status | counts | Country Name | Country Code | 2020 | |
|---|---|---|---|---|---|---|
| 0 | AFG | Closed due to COVID-19 | 161 | Afghanistan | AFG | 2.011614e+10 |
| 1 | AGO | Closed due to COVID-19 | 195 | Angola | AGO | 5.837598e+10 |
| 2 | ALB | Closed due to COVID-19 | 59 | Albania | ALB | 1.488763e+10 |
| 3 | ARE | Closed due to COVID-19 | 103 | United Arab Emirates | ARE | 3.588688e+11 |
| 4 | ARG | Closed due to COVID-19 | 157 | Argentina | ARG | 3.892881e+11 |
fig = px.scatter(sclclsr_gdp_combined, y ='2020', x = 'counts',
color = 'Country Name',
title ="Correlation analysis between country GDP and School fully closed status due to COVID-19 ",
labels = {'2020':'Country GDP(Log Scale)','counts':'School Fully Closed Days'},
log_y = True
)
fig.show()
sclclsr_gdp_combined.columns
Index(['Country ID', 'Status', 'counts', 'Country Name', 'Country Code',
'2020'],
dtype='object')
#df_corr = sclclsr_gdp_combined[['counts', '2020']]
df_corr = pd.DataFrame([])
df_corr['Fully_Closed_Days'] = sclclsr_gdp_combined['counts']
df_corr['GDP_LogScale'] = np.log(sclclsr_gdp_combined['2020'])
df_corrMatrix = df_corr.corr()
fig, ax = plt.subplots(figsize=(8,6))
sns.heatmap(df_corrMatrix, annot=True,linewidths=.5,cmap="YlGnBu")
plt.show()
fig = px.imshow(df_corrMatrix)
fig.show()